#!/usr/bin/env perl
# Copyright (c) 2006-2016 Cisco Systems, Inc.  All rights reserved.
# Copyright (c) 2020      Amazon.com, Inc. or its affiliates.
#                         All Rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#

# Common symbols cause linking issues on some platforms, including OS X.  See
# this issue for more background:
#   https://github.com/open-mpi/ompi/issues/375

use strict;
use warnings;

use Getopt::Long;
use File::Basename qw(basename);
use File::Find;
use Data::Dumper;

sub is_allowlisted;

my $MAX_BRIEF = 10;

my @orig_argv = @ARGV;
my @sym_allowlist = ();

sub usage {
    print STDERR <<EOT;
Usage: $0 --top_builddir=BUILDDIR --top_srcdir=SRCDIR [--objext=OBJEXT] [--brief] [--full-path]
[--skipdir=DIR]
Searches for all ".OBJEXT" files in BUILDDIR and checks for the existence of
common symbols.  Common symbols are problematic for some platforms, including
OS X.  Skipdir can be specified multiple times.

OBJEXT defaults to 'o' if not specified.
EOT
    exit 1;
}

my $all = 0;
my $brief = 0;
my $objext = 'o';
my $top_builddir = '';
my $top_srcdir = '';
my $print_full_obj_path = 0;
my $n = 0;
my @skipdirs = ();

GetOptions(
    "all!" => \$all,
    "brief!" => \$brief,
    "full-path!" => \$print_full_obj_path,
    "objext=s" => \$objext,
    "top_builddir=s" => \$top_builddir,
    "top_srcdir=s" => \$top_srcdir,
    "skipdir=s" => \@skipdirs
) || usage();

if (!$top_builddir or !$top_srcdir) {
    usage();
}

if (0 != system("command -v nm >/dev/null 2>&1")) {
    print STDERR "NOTE: nm not found, skipping common symbol check\n";
    # Makefile usage should prefix this command with "-" to ignore this exit status
    exit 1;
}

# load the common symbol allowlist from files scattered around the codebase
#
# It would be better to load these into some sort of tree and then have those
# allowlists only apply to objects that are found in the same directory or
# subdirectories.  That way a allowlisted symbol in one component doesn't
# "shadow" a symbol that should not be allowlisted in another component.  If we
# find this is actually a problem in practice then we can write a v2 update.
sub allowlist_find {
    if (!/^common_sym_allowlist.txt\z/) { return; }

    open(my $fh, "<", $_) or die "Can't open $_: $!";
    while (my $line = <$fh>) {
	chomp $line;
        next if ($line =~ /^\s*#/); # skip comments
        next if ($line =~ /^\s*$/); # skip blank lines
        push @sym_allowlist, $line;
    }
    close($fh);
}
File::Find::find({wanted => \&allowlist_find}, $top_srcdir);

sub object_find {
    my $obj = $_;

    # skip specified directories.  This needs to be before the objext
    # search below, or the prune will be missed and we'll recursive
    # into the directory anyway.
    if (grep { $_ eq $obj} @skipdirs) {
	$File::Find::prune = 1;
	return;
    }

    # skip any files not of form *.$objext
    if (!/^.*\.$objext\z/) { return; }

    # This pattern may not be 100% robust for all implementations of nm.  If
    # that turns out to be the case, we can try switching to "nm -P", which is
    # supposed to activate the "portable" (yet ugly) format.  It's also unclear
    # at this point how common support for "nm -P" is.
    open(NM, '-|', "nm '${obj}' 2>/dev/null | egrep '\\s[cC]\\s'");
    SYMBOL: while (my $sym_line = <NM>) {
        if (!$all and is_allowlisted($sym_line)) {
            next SYMBOL;
        }

        if ($n == 0) {
            print STDERR "WARNING!  Common symbols found:\n";
        }

	$n++;

        if ($brief and $n == $MAX_BRIEF) {
            print STDERR "[...]\n";
            print STDERR "skipping remaining symbols. To see all symbols, run:\n";
            print STDERR "  " . join(" ", ($0, grep {!/--brief/} @orig_argv)) . "\n";
        } else {
	    if ($print_full_obj_path) {
		print STDERR $File::Find::name . ": " . $sym_line;
	    } else {
		printf STDERR "%25s: %s", $obj, $sym_line;
	    }
	}
    }
}
File::Find::find({wanted => \&object_find}, $top_builddir);

if ($n > 0) {
    exit 1;
} else {
    exit 0;
}

sub is_allowlisted {
    my $line = shift;

    foreach my $wl_sym (@sym_allowlist) {
        if ($line =~ m/\b_?\Q$wl_sym\E\b/) {
            return 1;
        }
    }

    # Look for symbol names ending in one or more underscores and assume they
    # are "Fortran-shaped".  This won't match the hex output from most nm's and
    # shouldn't match the single characters that indicate symbol type.
    if ($line =~ m/\b_?[A-Za-z_]+[A-Za-z0-9_]*_+\b/) {
        return 1;
    }

    return 0;
}
